Chapter 5 Community composition
5.1 Taxonomy overview
5.1.1 Stacked barplot
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ environment + longitude, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")5.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum) %>%
summarise(relabun=sum(count))
phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean) %>%
tt()| phylum | mean | sd |
|---|---|---|
| p__Bacteroidota | 5.689285e-01 | 0.1540091282 |
| p__Bacillota_A | 1.743849e-01 | 0.0700812372 |
| p__Pseudomonadota | 1.113540e-01 | 0.1500626212 |
| p__Verrucomicrobiota | 6.181210e-02 | 0.0587656502 |
| p__Bacillota | 3.203183e-02 | 0.0403185065 |
| p__Desulfobacterota | 2.632570e-02 | 0.0270005049 |
| p__Fusobacteriota | 8.571358e-03 | 0.0145576339 |
| p__Bacillota_C | 6.103158e-03 | 0.0072686479 |
| p__Deferribacterota | 4.297397e-03 | 0.0057855372 |
| p__Cyanobacteriota | 2.749617e-03 | 0.0035676690 |
| p__Bacillota_B | 2.476044e-03 | 0.0022425682 |
| p__Elusimicrobiota | 8.931074e-04 | 0.0024994217 |
| p__Chlamydiota | 7.224535e-05 | 0.0001589365 |
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")5.2 Taxonomy boxplot
5.2.1 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))
family_summary %>%
group_by(family) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean) %>%
tt()| family | mean | sd |
|---|---|---|
| f__Bacteroidaceae | 2.471960e-01 | 0.1325115215 |
| f__Rikenellaceae | 1.160874e-01 | 0.0684497647 |
| f__Tannerellaceae | 1.019561e-01 | 0.0569863869 |
| f__Akkermansiaceae | 6.066164e-02 | 0.0582934333 |
| f__Marinifilaceae | 5.988787e-02 | 0.0475335342 |
| f__Lachnospiraceae | 5.808688e-02 | 0.0415927321 |
| f__Ruminococcaceae | 5.115411e-02 | 0.0373298162 |
| f__Enterobacteriaceae | 4.838945e-02 | 0.1181156814 |
| f__Aeromonadaceae | 3.001583e-02 | 0.0460225099 |
| f__Desulfovibrionaceae | 2.632570e-02 | 0.0270005049 |
| f__ | 2.477546e-02 | 0.0212954773 |
| f__Erysipelotrichaceae | 1.773323e-02 | 0.0146642456 |
| f__Clostridiaceae | 1.667605e-02 | 0.0246251130 |
| f__Mycoplasmoidaceae | 1.307127e-02 | 0.0392437991 |
| f__Moraxellaceae | 1.269634e-02 | 0.0263873382 |
| f__Oscillospiraceae | 1.049036e-02 | 0.0073022058 |
| f__Cellulosilyticaceae | 9.674865e-03 | 0.0174362664 |
| f__Muribaculaceae | 9.275306e-03 | 0.0103502184 |
| f__Fusobacteriaceae | 8.571358e-03 | 0.0145576339 |
| f__CHK158-818 | 7.802093e-03 | 0.0092742369 |
| f__Anaerovoracaceae | 6.418852e-03 | 0.0101906138 |
| f__CAG-239 | 5.594649e-03 | 0.0085961651 |
| f__Butyricicoccaceae | 5.536744e-03 | 0.0153801549 |
| f__Peptostreptococcaceae | 4.616539e-03 | 0.0103902778 |
| f__P3 | 4.487408e-03 | 0.0075825882 |
| f__Mucispirillaceae | 4.297397e-03 | 0.0057855372 |
| f__Pseudomonadaceae | 3.189040e-03 | 0.0060054779 |
| f__Acutalibacteraceae | 3.081339e-03 | 0.0045602908 |
| f__UBA3637 | 2.817205e-03 | 0.0073111663 |
| f__Gastranaerophilaceae | 2.636235e-03 | 0.0035305519 |
| f__UBA932 | 2.456666e-03 | 0.0033990559 |
| f__Peptococcaceae | 2.400061e-03 | 0.0022145205 |
| f__Succinispiraceae | 2.313402e-03 | 0.0024808879 |
| f__Chromobacteriaceae | 2.301141e-03 | 0.0099058815 |
| f__Pumilibacteraceae | 2.194988e-03 | 0.0025294376 |
| f__Anaerotignaceae | 2.153835e-03 | 0.0024871306 |
| f__Massilibacillaceae | 2.101814e-03 | 0.0044747006 |
| f__Shewanellaceae | 1.007172e-03 | 0.0039548332 |
| f__Chitinibacteraceae | 1.000738e-03 | 0.0024966652 |
| f__UBA3830 | 9.957148e-04 | 0.0015639545 |
| f__Xanthobacteraceae | 9.310902e-04 | 0.0037227287 |
| f__Coprobacillaceae | 9.258861e-04 | 0.0021323691 |
| f__Elusimicrobiaceae | 8.931074e-04 | 0.0024994217 |
| f__Burkholderiaceae_A | 8.678700e-04 | 0.0024729975 |
| f__UBA1997 | 7.055924e-04 | 0.0021616219 |
| f__Coprobacteraceae | 6.020864e-04 | 0.0009582824 |
| f__Sedimentibacteraceae | 5.766507e-04 | 0.0007547715 |
| f__UBA1820 | 5.595296e-04 | 0.0008649613 |
| f__CAG-508 | 4.668877e-04 | 0.0025549082 |
| f__Eubacteriaceae | 3.514209e-04 | 0.0006269121 |
| f__Borkfalkiaceae | 2.249938e-04 | 0.0004160226 |
| f__CALYAR01 | 1.572887e-04 | 0.0002786771 |
| f__Enterococcaceae | 1.502679e-04 | 0.0008889969 |
| f__CALVMC01 | 9.823090e-05 | 0.0005216160 |
| f__UBA660 | 7.705595e-05 | 0.0001929114 |
| f__UBA7702 | 7.598288e-05 | 0.0001804883 |
| f__GCF-1484045 | 7.412128e-05 | 0.0004385074 |
| f__Chlamydiaceae | 7.224535e-05 | 0.0001589365 |
| f__UBA3700 | 6.138625e-05 | 0.0003631659 |
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
# Per origin
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")5.2.2 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,phylum,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__") %>%
mutate(genus= sub("^g__", "", genus))
genus_summary_sort <- genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary_sort %>%
tt()| genus | mean | sd |
|---|---|---|
| Bacteroides | 2.435662e-01 | 0.1318569731 |
| Parabacteroides | 7.712066e-02 | 0.0442873518 |
| Mucinivorans | 5.485809e-02 | 0.0441493383 |
| Odoribacter | 3.998704e-02 | 0.0324395582 |
| Akkermansia | 3.699454e-02 | 0.0444888608 |
| Aeromonas | 3.001583e-02 | 0.0460225099 |
| Parabacteroides_B | 2.483548e-02 | 0.0277427000 |
| Hafnia | 2.227064e-02 | 0.1156606985 |
| JADFUS01 | 1.771383e-02 | 0.0105985347 |
| Alistipes | 1.471418e-02 | 0.0109627462 |
| Plesiomonas | 1.454771e-02 | 0.0327940636 |
| UBA866 | 1.419549e-02 | 0.0171150172 |
| Clostridium | 1.372645e-02 | 0.0226197767 |
| Bilophila | 1.331878e-02 | 0.0198622466 |
| Acinetobacter | 1.269634e-02 | 0.0263873382 |
| 14-2 | 1.233104e-02 | 0.0265894295 |
| Clostridium_Q | 1.171644e-02 | 0.0162699532 |
| Mycoplasma_L | 1.065393e-02 | 0.0394829437 |
| Dielma | 1.042251e-02 | 0.0127504085 |
| CAJGBR01 | 9.607999e-03 | 0.0085952436 |
| HGM05232 | 9.275306e-03 | 0.0103502184 |
| Cetobacterium | 7.826324e-03 | 0.0142283544 |
| Gallibacteroides | 7.802093e-03 | 0.0092742369 |
| JAIHAL01 | 6.871180e-03 | 0.0126256687 |
| Angelakisella | 6.505557e-03 | 0.0057582329 |
| Buttiauxella | 6.133033e-03 | 0.0189800892 |
| RGIG3102 | 5.734736e-03 | 0.0099264590 |
| Hydrogenoanaerobacterium | 5.697736e-03 | 0.0062384576 |
| SZUA-378 | 4.928183e-03 | 0.0137349713 |
| Pseudoflavonifractor | 4.242479e-03 | 0.0043794439 |
| Anaerotruncus | 4.137290e-03 | 0.0042016918 |
| Anaerovorax | 4.013726e-03 | 0.0091630296 |
| Butyricimonas | 3.937359e-03 | 0.0043410385 |
| UMGS1251 | 3.563237e-03 | 0.0054497977 |
| Pseudomonas_E | 3.189040e-03 | 0.0060054779 |
| Sarcina | 2.949596e-03 | 0.0057753291 |
| Intestinimonas | 2.941046e-03 | 0.0028260997 |
| Mobilisporobacter | 2.538844e-03 | 0.0044476714 |
| Tidjanibacter | 2.526410e-03 | 0.0022745134 |
| Hungatella_A | 2.509723e-03 | 0.0030300415 |
| Bacteroides_G | 2.482777e-03 | 0.0030685072 |
| Egerieousia | 2.456666e-03 | 0.0033990559 |
| Malacoplasma | 2.417336e-03 | 0.0054607581 |
| Craterilacuibacter | 2.301141e-03 | 0.0099058815 |
| Alistipes_A | 2.241040e-03 | 0.0019536428 |
| Budvicia | 2.215264e-03 | 0.0109761121 |
| Avirikenella | 1.958926e-03 | 0.0027352825 |
| Paraclostridium | 1.949494e-03 | 0.0097500062 |
| Anaerorhabdus | 1.842258e-03 | 0.0027674736 |
| Serratia_A | 1.746917e-03 | 0.0067047320 |
| UMGS1202 | 1.735431e-03 | 0.0017442272 |
| JAGAJR01 | 1.657831e-03 | 0.0037723464 |
| Romboutsia_A | 1.631884e-03 | 0.0027175524 |
| Amedibacillus | 1.617023e-03 | 0.0030514424 |
| Gallalistipes | 1.467926e-03 | 0.0012004170 |
| JAAYQI01 | 1.419084e-03 | 0.0023350948 |
| Intestinibacillus | 1.312996e-03 | 0.0016471339 |
| Phocea | 1.264862e-03 | 0.0022185638 |
| JAHHTP01 | 1.218787e-03 | 0.0014210421 |
| Massiliimalia | 1.160964e-03 | 0.0023829475 |
| RGIG4140 | 1.144305e-03 | 0.0064919092 |
| Aminipila | 1.100200e-03 | 0.0023493631 |
| Copranaerobaculum | 1.063109e-03 | 0.0042027196 |
| JAJBUQ01 | 1.044411e-03 | 0.0017072520 |
| Romboutsia_D | 1.035162e-03 | 0.0025533354 |
| Shewanella | 1.007172e-03 | 0.0039548332 |
| Ruthenibacterium | 1.000989e-03 | 0.0016556573 |
| Deefgea | 1.000738e-03 | 0.0024966652 |
| Bradyrhizobium | 9.310902e-04 | 0.0037227287 |
| Coprobacillus | 9.258861e-04 | 0.0021323691 |
| Rikenella | 8.870162e-04 | 0.0014086439 |
| JAEZVV01 | 8.678700e-04 | 0.0024729975 |
| RGIG7389 | 8.305951e-04 | 0.0010529671 |
| JAGNZR01 | 7.450332e-04 | 0.0026332724 |
| Kluyvera | 7.354109e-04 | 0.0033265669 |
| Bacilliculturomica | 7.159179e-04 | 0.0013304547 |
| Spyradomonas | 7.146271e-04 | 0.0012823418 |
| WRKB01 | 6.465109e-04 | 0.0015707534 |
| Evtepia | 6.395747e-04 | 0.0006982343 |
| Anaerotignum | 6.131710e-04 | 0.0012176488 |
| Coprobacter | 6.020864e-04 | 0.0009582824 |
| MGBC133411 | 5.894437e-04 | 0.0009156972 |
| Negativibacillus | 5.804496e-04 | 0.0006835800 |
| Robinsoniella | 5.353474e-04 | 0.0010897149 |
| IOR16 | 5.023546e-04 | 0.0007464032 |
| Muricomes | 4.994820e-04 | 0.0008485060 |
| UBA7488 | 4.750299e-04 | 0.0009978028 |
| RGIG8482 | 4.668877e-04 | 0.0025549082 |
| CAKVBE01 | 4.333183e-04 | 0.0014714300 |
| Citrobacter | 4.112408e-04 | 0.0013787481 |
| Faecalimonas | 3.688513e-04 | 0.0008326873 |
| Amedibacterium | 3.478068e-04 | 0.0016606843 |
| UBA1174 | 3.335192e-04 | 0.0019010146 |
| SIG603 | 3.300671e-04 | 0.0005805146 |
| Yersinia | 3.292357e-04 | 0.0012233173 |
| HGM16780 | 3.067475e-04 | 0.0013455312 |
| Fimivivens | 3.047193e-04 | 0.0004088726 |
| UBA1794 | 3.009228e-04 | 0.0005607342 |
| Longicatena | 2.583333e-04 | 0.0015283205 |
| Dysosmobacter | 2.037202e-04 | 0.0003972865 |
| JAGPHI01 | 1.805939e-04 | 0.0004150693 |
| Enterococcus | 1.502679e-04 | 0.0008889969 |
| CALXSC01 | 1.224638e-04 | 0.0004012663 |
| Hespellia | 1.135437e-04 | 0.0002278677 |
| Massilioclostridium | 1.117290e-04 | 0.0002591622 |
| 51-20 | 9.051798e-05 | 0.0005355116 |
| MGBC107952 | 7.705595e-05 | 0.0001929114 |
| Scatenecus | 7.620763e-05 | 0.0004062042 |
| Cryptoclostridium | 7.598288e-05 | 0.0001804883 |
| Lactonifactor | 6.094989e-05 | 0.0002283341 |
| CAZU01 | 4.914752e-05 | 0.0002907606 |
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
#Per pond
genus_summary %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")